home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Format CD 42
/
Amiga Format AFCD42 (Issue 126, Aug 1999).iso
/
-serious-
/
programming
/
other
/
jikes
/
src
/
scanner.cpp
< prev
next >
Wrap
C/C++ Source or Header
|
1999-05-14
|
55KB
|
1,607 lines
// $Id: scanner.cpp,v 1.5 1999/02/17 19:07:57 shields Exp $
//
// This software is subject to the terms of the IBM Jikes Compiler
// License Agreement available at the following URL:
// http://www.ibm.com/research/jikes.
// Copyright (C) 1996, 1998, International Business Machines Corporation
// and others. All Rights Reserved.
// You must accept the terms of that agreement to use this software.
//
#include "config.h"
#include "scanner.h"
#include "control.h"
#include "error.h"
int (*Scanner::scan_keyword[13]) (wchar_t *p1) =
{
ScanKeyword0,
ScanKeyword0,
ScanKeyword2,
ScanKeyword3,
ScanKeyword4,
ScanKeyword5,
ScanKeyword6,
ScanKeyword7,
ScanKeyword8,
ScanKeyword9,
ScanKeyword10,
ScanKeyword0,
ScanKeyword12
};
//
// The constructor initializes all utility variables.
//
Scanner::Scanner(Control &control_) : control(control_)
{
//
// If this assertion fails, the Token structure in stream.h must be redesigned !!!
//
assert(NUM_TERMINALS < 128);
//
// -------------------------------------------------------------------------------
// We are pulling this code out because we are tired of defending it. We
// tought it was obvious that either $ should not have been used for compiler
// generated variables or that users should not be allowed to use in variable names...
// -------------------------------------------------------------------------------
//
// For version 1.1 or above a $ may not be used as part of an identifier name
// unless the user specifically requests that it be allowed.
//
// if (control.option.one_one && (! control.option.dollar))
// Code::SetBadCode(U_DOLLAR);
//
//
// CLASSIFY_TOKEN is a mapping from each character into a
// classification routine that is invoked when that character
// is the first character encountered in a token.
//
for (int c = 0; c < 128; c++)
{
if (Code::IsAlpha(c))
classify_token[c] = &Scanner::ClassifyId;
else if (Code::IsDigit(c))
classify_token[c] = &Scanner::ClassifyNumericLiteral;
else classify_token[c] = &Scanner::ClassifyBadToken;
}
classify_token[128] = &Scanner::ClassifyNonAsciiUnicode;
classify_token[U_a] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_b] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_c] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_d] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_e] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_f] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_g] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_i] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_l] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_n] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_p] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_r] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_s] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_t] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_v] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_w] = &Scanner::ClassifyIdOrKeyword;
classify_token[U_SINGLE_QUOTE] = &Scanner::ClassifyCharLiteral;
classify_token[U_DOUBLE_QUOTE] = &Scanner::ClassifyStringLiteral;
classify_token[U_PLUS] = &Scanner::ClassifyPlus;
classify_token[U_MINUS] = &Scanner::ClassifyMinus;
classify_token[U_EXCLAMATION] = &Scanner::ClassifyNot;
classify_token[U_PERCENT] = &Scanner::ClassifyMod;
classify_token[U_CARET] = &Scanner::ClassifyXor;
classify_token[U_AMPERSAND] = &Scanner::ClassifyAnd;
classify_token[U_STAR] = &Scanner::ClassifyStar;
classify_token[U_BAR] = &Scanner::ClassifyOr;
classify_token[U_TILDE] = &Scanner::ClassifyComplement;
classify_token[U_SLASH] = &Scanner::ClassifySlash;
classify_token[U_GREATER] = &Scanner::ClassifyGreater;
classify_token[U_LESS] = &Scanner::ClassifyLess;
classify_token[U_LEFT_PARENTHESIS] = &Scanner::ClassifyLparen;
classify_token[U_RIGHT_PARENTHESIS] = &Scanner::ClassifyRparen;
classify_token[U_LEFT_BRACE] = &Scanner::ClassifyLbrace;
classify_token[U_RIGHT_BRACE] = &Scanner::ClassifyRbrace;
classify_token[U_LEFT_BRACKET] = &Scanner::ClassifyLbracket;
classify_token[U_RIGHT_BRACKET] = &Scanner::ClassifyRbracket;
classify_token[U_SEMICOLON] = &Scanner::ClassifySemicolon;
classify_token[U_QUESTION] = &Scanner::ClassifyQuestion;
classify_token[U_COLON] = &Scanner::ClassifyColon;
classify_token[U_COMMA] = &Scanner::ClassifyComma;
classify_token[U_DOT] = &Scanner::ClassifyPeriod;
classify_token[U_EQUAL] = &Scanner::ClassifyEqual;
return;
}
//
// Associate a lexical stream with this file
//
void Scanner::Initialize(FileSymbol *file_symbol)
{
lex = new LexStream(control, file_symbol);
lex -> Reset();
LexStream::Token *current_token = &(lex -> token_stream.Next()); // add 0th token !
current_token -> SetKind(0);
current_token -> SetLocation(0);
current_token -> SetSymbol(NULL);
if (control.option.comments)
{
LexStream::Comment *current_comment = &(lex -> comment_stream.Next()); // add 0th comment !
current_comment -> string = NULL;
current_comment -> length = 0;
current_comment -> previous_token = -1; // No token precedes this comment
current_comment -> location = 0;
}
lex -> line_location.Next() = 0; // mark starting location of line # 0
return;
}
//
// This is one of the main entry point for the Java lexical analyser.
// Its input is the name of a regular text file. Its output is a stream
// of tokens.
//
void Scanner::SetUp(FileSymbol *file_symbol)
{
Initialize(file_symbol);
lex -> CompressSpace();
file_symbol -> lex_stream = lex;
return;
}
//
// This is one of the main entry point for the Java lexical analyser.
// Its input is the name of a regular text file. Its output is a stream
// of tokens.
//
void Scanner::Scan(FileSymbol *file_symbol)
{
Initialize(file_symbol);
lex -> ReadInput();
cursor = lex -> InputBuffer();
if (cursor)
{
Scan();
lex -> CompressSpace();
//
//
//
if (control.option.dump_errors)
{
lex -> SortMessages();
for (int i = 0; i < lex -> bad_tokens.Length(); i++)
lex -> PrintEmacsMessage(i);
cout.flush();
}
lex -> DestroyInput(); // get rid of input buffer
}
else
{
delete lex;
lex = NULL;
}
file_symbol -> lex_stream = lex;
return;
}
//
// Scan the InputBuffer() and process all tokens and comments.
//
void Scanner::Scan()
{
wchar_t *input_buffer_tail = &cursor[lex -> InputBufferLength()];
//
// CURSOR is assumed to point to the next character to be scanned.
// Using CURSOR,we jump to the proper classification function
// which scans and classifies the token and returns the location of
// the character immediately following it.
//
do
{
SkipSpaces();
(this ->* classify_token[*cursor < 128 ? *cursor : 128])();
} while (cursor < input_buffer_tail);
//
// Add a a gate after the last line.
//
lex -> line_location.Next() = input_buffer_tail - lex -> InputBuffer();
//
// If the brace_stack is not empty, then there are unmatched left
// braces in the input. Each unmatched left brace should point to
// the EOF token as a substitute for a matching right brace.
//
for (LexStream::TokenIndex left_brace = brace_stack.Top(); left_brace; left_brace = brace_stack.Top())
{
lex -> token_stream[left_brace].SetRightBrace(lex -> token_stream.Length() - 1);
brace_stack.Pop();
}
return;
}
//
// CURSOR points to the s